# loading libraries
library(tidyverse)
library(nycflights13)

# modifying chart size
options(repr.plot.width=5, repr.plot.height=3)

Exercise 1

Find all flights that

  1. Had an arrival delay of two or more hours
  2. Flew to Houston (IAH or HOU)
  3. Were operated by United, American, or Delta (hint: use airline dataset to get the two-character carrier codes)
  4. Departed in summer (July, August, and September)
  5. Arrived more than two hours late, but didn’t leave late
  6. Departed by at least an hour delay, but made up over 30 minutes in flight
  7. Departed between midnight and 6am (inclusive)

Answer:

  1. Had an arrival delay of two or more hours
filter(flights, arr_delay >= 120)
  1. Flew to Houston (IAH or HOU)
filter(flights, dest == "IAH" | dest == "HOU")
# or filter(flights, dest %in% c("IAH", "HOU"))
  1. Were operated by United, American, or Delta (hint: use airline dataset to get the two-character carrier codes)
airlines
filter(flights, carrier %in% c("AA", "DL", "UA"))
  1. Departed in summer (July, August, and September)
filter(flights, month %in%  c(7, 8, 9))
  1. Arrived more than two hours late, but didn’t leave late
filter(flights, dep_delay <= 0, arr_delay > 120)
  1. Departed by at least an hour delay, but made up over 30 minutes in flight
filter(flights, dep_delay >= 60, dep_delay - arr_delay > 30)
  1. Departed between midnight and 6am (inclusive)
filter(flights, dep_time <= 600 | dep_time == 2400)

Exercise 2

Another useful dplyr filtering helper is between(). What does it do? Can you use it to simplify the code needed to answer the previous exercise, part 4?

Answer:

?between
filter(flights, between(month, 7, 9))

Exercise 3

How many flights have a missing dep_time? What other variables are missing? What might these rows represent?

Answer:

8255 flights have missing dep_time.

arr_time is also missing for these rows. Seems to be canceled flights.

filter(flights, is.na(dep_time))

Exercise 4

  1. How could you use arrange() to sort all missing values to the start? (Hint: use is.na() and apply your answer to the df dataframe defined below):
(df <- tibble(x = c(5, 6, 2, NA)))
  1. Apply this answer to flights dataset to list the canceled flights first (assuming NA for dep_delay means the flight was canceled) and then show other flights starting with the ones with large dep_delay in a descending order.

Answer:

# missing value first
arrange(df, !is.na(x))
# missing value first, then descending
arrange(df, !is.na(x), desc(x))
arrange(flights, !is.na(dep_delay), desc(dep_delay))

We could’ve also used desc(is.na(dep_time)) instead of !is.na(dep_delay), desc(dep_delay).


Exercise 5

Brainstorm as many ways as possible to select dep_time, dep_delay, arr_time, and arr_delay from flights.

Answer:

select(flights, dep_time, dep_delay, arr_time, arr_delay)

All the following will result the same:

select(flights, "dep_time", "dep_delay", "arr_time", "arr_delay")

select(flights, 4, 6, 7, 9)

select(flights, 4:9, -c(5,8))

select(flights, starts_with("dep_"), starts_with("arr_"))

variables <- c("dep_time", "dep_delay", "arr_time", "arr_delay")
select(flights, variables)

Exercise 6

What happens if you include the name of a variable multiple times in a select() call?

Answer:

It will show up only once:

select(flights, dep_time, dep_delay, dep_time)

Exercise 7

Does the result of running the following code surprise you? How do the select helpers deal with case by default? Can you change that default?

select(flights, contains("TIME"))

Answer:

select(flights, contains("TIME", ignore.case = FALSE))

ignore.case = TRUE is the default, so we don’t even need to specify it:

select(flights, contains("TIME", ignore.case = TRUE))
LS0tCnRpdGxlOiAiU29sdXRpb24iCnN1YnRpdGxlOiAiRGF0YSBUcmFuc2Zvcm1hdGlvbiAtIFBhcnQgMSIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKYGBge3J9CiMgbG9hZGluZyBsaWJyYXJpZXMKbGlicmFyeSh0aWR5dmVyc2UpCmxpYnJhcnkobnljZmxpZ2h0czEzKQoKIyBtb2RpZnlpbmcgY2hhcnQgc2l6ZQpvcHRpb25zKHJlcHIucGxvdC53aWR0aD01LCByZXByLnBsb3QuaGVpZ2h0PTMpCmBgYAoKIyMjIEV4ZXJjaXNlIDEKRmluZCBhbGwgZmxpZ2h0cyB0aGF0CgoxLiBIYWQgYW4gYXJyaXZhbCBkZWxheSBvZiB0d28gb3IgbW9yZSBob3VycwoyLiBGbGV3IHRvIEhvdXN0b24gKElBSCBvciBIT1UpCjMuIFdlcmUgb3BlcmF0ZWQgYnkgVW5pdGVkLCBBbWVyaWNhbiwgb3IgRGVsdGEgKGhpbnQ6IHVzZSBgYWlybGluZWAgZGF0YXNldCB0byBnZXQgdGhlIHR3by1jaGFyYWN0ZXIgY2FycmllciBjb2RlcykKNC4gRGVwYXJ0ZWQgaW4gc3VtbWVyIChKdWx5LCBBdWd1c3QsIGFuZCBTZXB0ZW1iZXIpCjUuIEFycml2ZWQgbW9yZSB0aGFuIHR3byBob3VycyBsYXRlLCBidXQgZGlkbid0IGxlYXZlIGxhdGUKNi4gRGVwYXJ0ZWQgYnkgYXQgbGVhc3QgYW4gaG91ciBkZWxheSwgYnV0IG1hZGUgdXAgb3ZlciAzMCBtaW51dGVzIGluIGZsaWdodAo3LiBEZXBhcnRlZCBiZXR3ZWVuIG1pZG5pZ2h0IGFuZCA2YW0gKGluY2x1c2l2ZSkKCioqQW5zd2VyOioqCgoxLiBIYWQgYW4gYXJyaXZhbCBkZWxheSBvZiB0d28gb3IgbW9yZSBob3VycwpgYGB7cn0KZmlsdGVyKGZsaWdodHMsIGFycl9kZWxheSA+PSAxMjApCmBgYAoyLiBGbGV3IHRvIEhvdXN0b24gKElBSCBvciBIT1UpCmBgYHtyfQpmaWx0ZXIoZmxpZ2h0cywgZGVzdCA9PSAiSUFIIiB8IGRlc3QgPT0gIkhPVSIpCiMgb3IgZmlsdGVyKGZsaWdodHMsIGRlc3QgJWluJSBjKCJJQUgiLCAiSE9VIikpCmBgYAoKMy4gV2VyZSBvcGVyYXRlZCBieSBVbml0ZWQsIEFtZXJpY2FuLCBvciBEZWx0YSAoaGludDogdXNlIGBhaXJsaW5lYCBkYXRhc2V0IHRvIGdldCB0aGUgdHdvLWNoYXJhY3RlciBjYXJyaWVyIGNvZGVzKQpgYGB7cn0KYWlybGluZXMKYGBgCgpgYGB7cn0KZmlsdGVyKGZsaWdodHMsIGNhcnJpZXIgJWluJSBjKCJBQSIsICJETCIsICJVQSIpKQpgYGAKCgo0LiBEZXBhcnRlZCBpbiBzdW1tZXIgKEp1bHksIEF1Z3VzdCwgYW5kIFNlcHRlbWJlcikKYGBge3J9CmZpbHRlcihmbGlnaHRzLCBtb250aCAlaW4lICBjKDcsIDgsIDkpKQpgYGAKCjUuIEFycml2ZWQgbW9yZSB0aGFuIHR3byBob3VycyBsYXRlLCBidXQgZGlkbid0IGxlYXZlIGxhdGUKYGBge3J9CmZpbHRlcihmbGlnaHRzLCBkZXBfZGVsYXkgPD0gMCwgYXJyX2RlbGF5ID4gMTIwKQpgYGAKCjYuIERlcGFydGVkIGJ5IGF0IGxlYXN0IGFuIGhvdXIgZGVsYXksIGJ1dCBtYWRlIHVwIG92ZXIgMzAgbWludXRlcyBpbiBmbGlnaHQKYGBge3J9CmZpbHRlcihmbGlnaHRzLCBkZXBfZGVsYXkgPj0gNjAsIGRlcF9kZWxheSAtIGFycl9kZWxheSA+IDMwKQpgYGAKCjcuIERlcGFydGVkIGJldHdlZW4gbWlkbmlnaHQgYW5kIDZhbSAoaW5jbHVzaXZlKQpgYGB7cn0KZmlsdGVyKGZsaWdodHMsIGRlcF90aW1lIDw9IDYwMCB8IGRlcF90aW1lID09IDI0MDApCmBgYAoKLS0tCgojIyMgRXhlcmNpc2UgMgpBbm90aGVyIHVzZWZ1bCBkcGx5ciBmaWx0ZXJpbmcgaGVscGVyIGlzIGBiZXR3ZWVuKClgLiBXaGF0IGRvZXMgaXQgZG8/IENhbiB5b3UgdXNlIGl0IHRvIHNpbXBsaWZ5IHRoZSBjb2RlIG5lZWRlZCB0byBhbnN3ZXIgdGhlIHByZXZpb3VzIGV4ZXJjaXNlLCBwYXJ0IDQ/CgoqKkFuc3dlcjoqKgoKYGBge3IgZXZhbD1GQUxTRX0KP2JldHdlZW4KYGBgCgpgYGB7cn0KZmlsdGVyKGZsaWdodHMsIGJldHdlZW4obW9udGgsIDcsIDkpKQpgYGAKCi0tLQoKIyMjIEV4ZXJjaXNlIDMKSG93IG1hbnkgZmxpZ2h0cyBoYXZlIGEgbWlzc2luZyBgZGVwX3RpbWVgPyBXaGF0IG90aGVyIHZhcmlhYmxlcyBhcmUgbWlzc2luZz8gV2hhdCBtaWdodCB0aGVzZSByb3dzIHJlcHJlc2VudD8KCioqQW5zd2VyOioqCgpgciBmaWx0ZXIoZmxpZ2h0cywgaXMubmEoZGVwX3RpbWUpKSAlPiUgbnJvd2AgZmxpZ2h0cyBoYXZlIG1pc3NpbmcgYGRlcF90aW1lYC4KCmBhcnJfdGltZWAgaXMgYWxzbyBtaXNzaW5nIGZvciB0aGVzZSByb3dzLiBTZWVtcyB0byBiZSBjYW5jZWxlZCBmbGlnaHRzLgoKYGBge3J9CmZpbHRlcihmbGlnaHRzLCBpcy5uYShkZXBfdGltZSkpCmBgYAoKCi0tLQoKIyMjIEV4ZXJjaXNlIDQKMSkgSG93IGNvdWxkIHlvdSB1c2UgYGFycmFuZ2UoKWAgdG8gc29ydCBhbGwgbWlzc2luZyB2YWx1ZXMgdG8gdGhlIHN0YXJ0PyAoSGludDogdXNlIGBpcy5uYSgpYCBhbmQgYXBwbHkgeW91ciBhbnN3ZXIgdG8gdGhlIGBkZmAgZGF0YWZyYW1lIGRlZmluZWQgYmVsb3cpOgoKYGBge3J9CihkZiA8LSB0aWJibGUoeCA9IGMoNSwgNiwgMiwgTkEpKSkKYGBgCgoyKSBBcHBseSB0aGlzIGFuc3dlciB0byBgZmxpZ2h0c2AgZGF0YXNldCB0byBsaXN0IHRoZSBjYW5jZWxlZCBmbGlnaHRzIGZpcnN0IChhc3N1bWluZyBgTkFgIGZvciBgZGVwX2RlbGF5YCBtZWFucyB0aGUgZmxpZ2h0IHdhcyBjYW5jZWxlZCkgYW5kIHRoZW4gc2hvdyBvdGhlciBmbGlnaHRzIHN0YXJ0aW5nIHdpdGggdGhlIG9uZXMgd2l0aCBsYXJnZSBgZGVwX2RlbGF5YCBpbiBhIGRlc2NlbmRpbmcgb3JkZXIuCgoqKkFuc3dlcjoqKgoKMSkKCmBgYHtyfQojIG1pc3NpbmcgdmFsdWUgZmlyc3QKYXJyYW5nZShkZiwgIWlzLm5hKHgpKQpgYGAKCmBgYHtyfQojIG1pc3NpbmcgdmFsdWUgZmlyc3QsIHRoZW4gZGVzY2VuZGluZwphcnJhbmdlKGRmLCAhaXMubmEoeCksIGRlc2MoeCkpCmBgYAoKMikKCmBgYHtyfQphcnJhbmdlKGZsaWdodHMsICFpcy5uYShkZXBfZGVsYXkpLCBkZXNjKGRlcF9kZWxheSkpCmBgYAoKV2UgY291bGQndmUgYWxzbyB1c2VkIGBkZXNjKGlzLm5hKGRlcF90aW1lKSlgIGluc3RlYWQgb2YgYCFpcy5uYShkZXBfZGVsYXkpLCBkZXNjKGRlcF9kZWxheSlgLgoKLS0tCgojIyMgRXhlcmNpc2UgNQpCcmFpbnN0b3JtIGFzIG1hbnkgd2F5cyBhcyBwb3NzaWJsZSB0byBzZWxlY3QgYGRlcF90aW1lYCwgYGRlcF9kZWxheWAsIGBhcnJfdGltZWAsIGFuZCBgYXJyX2RlbGF5YCBmcm9tIGBmbGlnaHRzYC4KCioqQW5zd2VyOioqCgpgYGB7cn0Kc2VsZWN0KGZsaWdodHMsIGRlcF90aW1lLCBkZXBfZGVsYXksIGFycl90aW1lLCBhcnJfZGVsYXkpCmBgYAoKQWxsIHRoZSBmb2xsb3dpbmcgd2lsbCByZXN1bHQgdGhlIHNhbWU6CgpgYGB7ciBldmFsPUZBTFNFfQpzZWxlY3QoZmxpZ2h0cywgImRlcF90aW1lIiwgImRlcF9kZWxheSIsICJhcnJfdGltZSIsICJhcnJfZGVsYXkiKQoKc2VsZWN0KGZsaWdodHMsIDQsIDYsIDcsIDkpCgpzZWxlY3QoZmxpZ2h0cywgNDo5LCAtYyg1LDgpKQoKc2VsZWN0KGZsaWdodHMsIHN0YXJ0c193aXRoKCJkZXBfIiksIHN0YXJ0c193aXRoKCJhcnJfIikpCgp2YXJpYWJsZXMgPC0gYygiZGVwX3RpbWUiLCAiZGVwX2RlbGF5IiwgImFycl90aW1lIiwgImFycl9kZWxheSIpCnNlbGVjdChmbGlnaHRzLCB2YXJpYWJsZXMpCmBgYAoKCi0tLQoKIyMjIEV4ZXJjaXNlIDYKV2hhdCBoYXBwZW5zIGlmIHlvdSBpbmNsdWRlIHRoZSBuYW1lIG9mIGEgdmFyaWFibGUgbXVsdGlwbGUgdGltZXMgaW4gYSBgc2VsZWN0KClgIGNhbGw/CgoqKkFuc3dlcjoqKgoKSXQgd2lsbCBzaG93IHVwIG9ubHkgb25jZToKCmBgYHtyfQpzZWxlY3QoZmxpZ2h0cywgZGVwX3RpbWUsIGRlcF9kZWxheSwgZGVwX3RpbWUpCmBgYAoKLS0tCgojIyMgRXhlcmNpc2UgNwpEb2VzIHRoZSByZXN1bHQgb2YgcnVubmluZyB0aGUgZm9sbG93aW5nIGNvZGUgc3VycHJpc2UgeW91PyBIb3cgZG8gdGhlIGBzZWxlY3RgIGhlbHBlcnMgZGVhbCB3aXRoIGNhc2UgYnkgZGVmYXVsdD8gQ2FuIHlvdSBjaGFuZ2UgdGhhdCBkZWZhdWx0PwoKYGBge3J9CnNlbGVjdChmbGlnaHRzLCBjb250YWlucygiVElNRSIpKQpgYGAKCioqQW5zd2VyOioqCgpgYGB7cn0Kc2VsZWN0KGZsaWdodHMsIGNvbnRhaW5zKCJUSU1FIiwgaWdub3JlLmNhc2UgPSBGQUxTRSkpCmBgYAoKaWdub3JlLmNhc2UgPSBUUlVFIGlzIHRoZSBkZWZhdWx0LCBzbyB3ZSBkb24ndCBldmVuIG5lZWQgdG8gc3BlY2lmeSBpdDoKCmBgYHtyfQpzZWxlY3QoZmxpZ2h0cywgY29udGFpbnMoIlRJTUUiLCBpZ25vcmUuY2FzZSA9IFRSVUUpKQpgYGAK